######### Observable Bounds of Rationality and Credibility, Journal of Politics ###########

####### Data Pre-Processing Pipeline #########

####  Set WD ####
setwd() #Use filepath

#### Load Data and Packages ####
library(tidyverse)
library(plyr)
library(dplyr)
library(readxl)
library(writexl)
library(tidylog)

Connect <- as.data.frame(read_excel("CredibilityJOP_RawData.xlsx")) 


# Drop Duplicates - Note, the first two lines below were run and then IPAddress is omitted from the data to protect respondent confidentiality. 
#Connect$duplicateIP <- ifelse(duplicated(Connect$IPAddress), 1, 0) # Note: in the data, we omit respondent IPAddress. 
#write_xlsx(Connect, "CredibilityJOP_RawData.xlsx")
Connect <- Connect[Connect$duplicateIP == 0, ]

# ID Bad Responses.  Note that this package was removed.
#library(rIP)
#VSP <- getIPinfo(Connect, "IPAddress", iphub_key = "MTc5MTQ6Z0RBZzJZZXJKTW1ISUNiRFVtMWdOV1U2bE5CcFFjbWY=")

# Merge 
#Connect <- merge(Connect, VSP, by="IPAddress")
#Connect <- subset(Connect, IP_Hub_recommend_block == 0)

# Drop respondents who fail pre-treatment attention check; note that they will be screened out of survey 
Connect <- Connect %>%
  mutate(manipcheck_success = if_else(manipcheck == 
                                        "Country B claimed that a piece of territory in Country A rightfully belonged to them",
                                      1, 0))
Connect <- Connect %>%
  filter(manipcheck_success == 1)

#### Rename Variables ####
Connect$Duration <- Connect$`Duration (in seconds)`
Connect$timer1 <- Connect$`timer_cj1_Page Submit`
Connect$timer2 <- Connect$`timer_2_Page Submit`
Connect$timer3 <- Connect$`timer_3_Page Submit`
Connect$timer4 <- Connect$`timer_4_Page Submit`
Connect$timer5 <- Connect$`timer_5_Page Submit`
Connect$timer6 <- Connect$`timer_6_Page Submit`
Connect$timer7 <- Connect$`timer_7_Page Submit`
Connect$timer8 <- Connect$`timer_8_Page Submit`
Connect$timer9 <- Connect$`timer_9_Page Submit`
Connect$timer10 <- Connect$`timer_10_Page Submit`
Connect$timer11 <- Connect$`timer_l1_Page Submit`
Connect$timer12 <- Connect$`timer_l2_Page Submit`
Connect$timer13 <- Connect$`timer_l3_Page Submit`
Connect$timer14 <- Connect$`timer_l4_Page Submit` 
Connect$timer15 <- Connect$`timer_l5_Page Submit`
Connect$timer16 <- Connect$`timer_s1_Page Submit`
Connect$timer17 <- Connect$`timer_s2_Page Submit`
Connect$timer18 <- Connect$`timer_s3_Page Submit`
Connect$timer19 <- Connect$`timer_s4_Page Submit`
Connect$timer20 <- Connect$`timer_s5_Page Submit`
Connect$dv_l11 <- Connect$dv_l1
Connect$dv_l12 <- Connect$dv_l2
Connect$dv_l13 <- Connect$dv_l3
Connect$dv_l14 <- Connect$dv_l4 
Connect$dv_l15 <- Connect$dv_l5
Connect$dv_s16 <- Connect$dv_s1
Connect$dv_s17 <- Connect$dv_s2
Connect$dv_s18 <- Connect$dv_s3
Connect$dv_s19 <- Connect$dv_s4
Connect$dv_s20 <- Connect$dv_s5



###### Function to process all profiles, noting that 1 through 10 are main experiment, 11 through 15 are skill mechanism test, 16 through 20 are difficulty mechanism check #######



# Define a function to remove text before and including the colon
remove_before_colon <- function(x) {
  x <- sub("^[^:]*: ?", "", x)  # Remove everything before and including the colon
  return(x)
}

# Initialize empty lists to store each profile's data frame and Connect data frame
all_profiles <- list()
all_mt <- list()

# Loop over traits1 to traits10
for (i in 1:10) {
  # Generate the trait, timer, and dv column names
  trait_col <- paste0("traits", i)
  timer_col <- paste0("timer", i)
  dv_col <- paste0("dv_", i)
  
  # Split the cleaned trait column by commas and convert it to a list of vectors
  traits_list <- strsplit(Connect[[trait_col]], ",")
  
  # Convert the list of vectors to a data frame
  traits <- do.call(rbind.data.frame, traits_list)
  
  # Rename the columns of the new data frame
  colnames(traits) <- c('iv_age', 'iv_leaderexp', 'iv_foreignexp', 'iv_militaryexp', 'iv_academics', 
                        'iv_anger', 'iv_intel', 'iv_fatigue', 'iv_illness', 'iv_stress', 'iv_milstrength')
  
  # Apply the function to each column in the data frame
  traits <- as.data.frame(lapply(traits, function(col) {
    if (is.character(col)) {
      return(sapply(col, remove_before_colon))
    } else {
      return(col)
    }
  }))
  
  # Remove '}' specifically from the 'iv_milstrength' column
  traits$iv_milstrength <- gsub("}", "", traits$iv_milstrength)
  
  # Add 'timer' and 'dv' columns from Connect to the new data frame
  traits$timer <- Connect[[timer_col]]
  traits$dv_eval <- Connect[[dv_col]]
  traits$dv_skill <- NA
  traits$dv_difficulty <- NA
  traits$leader_order <- Connect$leaderOrder
  traits$situation_order <- Connect$situationOrder
  
  # Add profile number to the data frame
  traits$profile <- i
  
  # Append the data frame to the list
  all_profiles[[i]] <- traits
  
  # Append the Connect data frame to the list
  all_mt[[i]] <- Connect
}

# Handle profiles 11 through 15
for (i in 11:15) {
  # Generate the trait, timer, and dv column names
  trait_col <- paste0("traits", i)
  timer_col <- paste0("timer", i)
  dv_col <- paste0("dv_l", i)
  
  # Split the cleaned trait column by commas and convert it to a list of vectors
  traits_list <- strsplit(Connect[[trait_col]], ",")
  
  # Convert the list of vectors to a data frame
  traits <- do.call(rbind.data.frame, traits_list)
  
  # Create a dataframe with NA columns for the last 5 traits
  na_traits <- data.frame(matrix(NA, nrow=nrow(traits), ncol=5))
  colnames(na_traits) <- c('iv_intel', 'iv_fatigue', 'iv_illness', 'iv_stress', 'iv_milstrength')
  
  # Combine the traits with the NA columns
  traits <- cbind(traits, na_traits)
  
  # Rename the columns of the new data frame
  colnames(traits) <- c('iv_age', 'iv_leaderexp', 'iv_foreignexp', 'iv_militaryexp', 'iv_academics', 'iv_anger', 
                        'iv_intel', 'iv_fatigue', 'iv_illness', 'iv_stress', 'iv_milstrength')
  
  # Apply the function to each column in the data frame
  traits <- as.data.frame(lapply(traits, function(col) {
    if (is.character(col)) {
      return(sapply(col, remove_before_colon))
    } else {
      return(col)
    }
  }))
  
  # Add 'timer' and 'dv' columns from Connect to the new data frame
  traits$timer <- Connect[[timer_col]]
  traits$dv_skill <- Connect[[dv_col]]
  traits$dv_eval <- NA
  traits$dv_difficulty <- NA
  traits$leader_order <- Connect$leaderOrder
  traits$situation_order <- Connect$situationOrder
  
  # Add profile number to the data frame
  traits$profile <- i
  
  # Append the data frame to the list
  all_profiles[[i]] <- traits
  
  # Append the Connect data frame to the list
  all_mt[[i]] <- Connect
}

# Handle profiles 16 through 20
for (i in 16:20) {
  # Generate the trait, timer, and dv column names
  trait_col <- paste0("traits", i)
  timer_col <- paste0("timer", i)
  dv_col <- paste0("dv_s", i)
  
  # Split the cleaned trait column by commas and convert it to a list of vectors
  traits_list <- strsplit(Connect[[trait_col]], ",")
  
  # Convert the list of vectors to a data frame
  traits <- do.call(rbind.data.frame, traits_list)
  
  # Create a dataframe with NA columns for the first 6 traits
  na_traits <- data.frame(matrix(NA, nrow=nrow(traits), ncol=6))
  colnames(na_traits) <- c('iv_age', 'iv_leaderexp', 'iv_foreignexp', 'iv_militaryexp', 'iv_academics', 'iv_anger')
  
  # Combine the NA columns with the traits columns
  traits <- cbind(na_traits, traits)
  
  # Rename the columns of the new data frame
  colnames(traits) <- c('iv_age', 'iv_leaderexp', 'iv_foreignexp', 'iv_militaryexp', 'iv_academics', 'iv_anger', 
                        'iv_intel', 'iv_fatigue', 'iv_illness', 'iv_stress', 'iv_milstrength')
  
  # Apply the function to each column in the data frame
  traits <- as.data.frame(lapply(traits, function(col) {
    if (is.character(col)) {
      return(sapply(col, remove_before_colon))
    } else {
      return(col)
    }
  }))
  
  # Remove '}' specifically from the 'iv_milstrength' column
  traits$iv_milstrength <- gsub("}", "", traits$iv_milstrength)
  
  # Add 'timer' and 'dv' columns from Connect to the new data frame
  traits$timer <- Connect[[timer_col]]
  traits$dv_difficulty <- Connect[[dv_col]]
  traits$dv_eval <- NA
  traits$dv_skill <- NA
  traits$leader_order <- Connect$leaderOrder
  traits$situation_order <- Connect$situationOrder
  
  # Add profile number to the data frame
  traits$profile <- i
  
  # Append the data frame to the list
  all_profiles[[i]] <- traits
  
  # Append the Connect data frame to the list
  all_mt[[i]] <- Connect
}

# Combine all profiles into one data frame
conjoint_all <- do.call(rbind, all_profiles)
Connect_all <- do.call(rbind, all_mt)

# Combine Connect and conjoint data frames
Connect_Parsed <- cbind(Connect_all, conjoint_all)

# Select and reorder columns
Connect_Parsed <- Connect_Parsed %>% select(c(ResponseId, DistributionChannel, contains("Date"), Duration, timer, 
                                                    profile, dv_eval, dv_skill, dv_difficulty, starts_with("iv_"), gender, race, educ,  
                                                    mil_self, mil_fam, married, children, age, contains("pid"), dstr, rstr, 
                                                    lean, ideo))

# Ensure all target columns are character type
Connect_Parsed <- Connect_Parsed %>%
  mutate(across(starts_with("iv_"), as.character))

# Manually identify columns to transform
iv_columns <- c("iv_leaderexp", "iv_foreignexp", "iv_militaryexp", "iv_academics", "iv_anger", "iv_intel", "iv_fatigue",
                "iv_illness", "iv_stress", "iv_milstrength")

# Function to remove all types of quotes from specified columns
remove_quotes_iv <- function(df, columns) {
  for (col in columns) {
    df[[col]] <- gsub('[\'"`]', '', df[[col]])
  }
  return(df)
}

Connect_Parsed <- remove_quotes_iv(Connect_Parsed, iv_columns)

Connect_Parsed$iv_anger <- gsub('}', '', Connect_Parsed$iv_anger) #Remove }

Connect_Parsed$iv_stress <- gsub("[‘’']", "", Connect_Parsed$iv_stress) # Remove apostrophe

#### Save data and clear console ####
save(Connect_Parsed, file = "Connect_Parsed.rda")
rm(list=ls())



#### Re-Load Data to Recode Variables ####
library(scales)
load("Connect_Parsed.rda")
Connect <- Connect_Parsed
rm(Connect_Parsed)

# Timing variables for Full Survey and Specific Conjoint Profiles
Connect$Duration <- as.numeric(Connect$Duration)
Connect$Duration <- round(Connect$Duration/60, digits = 1) # in minutes
Connect$timer <- as.numeric(Connect$timer)
Connect$timer <- round(Connect$timer, digits=0) # in seconds


# DV Leader Evaluations

Connect$dv_eval <- recode(Connect$dv_eval,
                        'Extremely unlikely' = 0,
                        'Very unlikely' = .2,
                        'Somewhat unlikely' = .4,
                        'Somewhat likely' = .6,
                        'Very likely' = .8,
                        'Extremely likely' = 1)
  
Connect$dv_skill <- recode(Connect$dv_skill,
                         'Not at all skilled' = 0,
                         'A little bit skilled' = .25,
                         'Moderately skilled' = .5,
                         'Very skilled' = .75,
                         'Extremely skilled' = 1)

Connect$dv_difficulty <- recode(Connect$dv_difficulty,
                              'Not at all difficult' = 0,
                              'A little bit difficult' = .25,
                              'Moderately difficult' = .5,
                              'Very difficult' = .75,
                              'Extremely difficult' = 1)  


# Leader Age (80s = 0, 50s = 1)
Connect$iv_age <- as.numeric(Connect$iv_age)
Connect$iv_age[Connect$iv_age < 60] <- 1
Connect$iv_age[Connect$iv_age > 60] <- 0 

# Leader Experience (Low = 0, High = 1)
leaderexp <- Connect %>% distinct(iv_leaderexp)

Connect$iv_leaderexp <- recode(Connect$iv_leaderexp, 
                            '2 months' = 0,  
                             '3 months' = 0, 
                             '4 months' = 0, 
                             '5 months' = 0, 
                             '6 months' = 0, 
                             '4 years' = 1, 
                             '5 years' = 1, 
                             '6 years' = 1, 
                             '7 years' = 1, 
                             '8 years' = 1)



# Prior Foreign Policy Experience (No = 0, Yes = 1)
Connect$iv_foreignexp <- recode(Connect$iv_foreignexp, 'No' = 0, 'Yes' = 1)

# Prior Military Service (No = 0, Yes = 1)
Connect$iv_militaryexp <- recode(Connect$iv_militaryexp, 'No' = 0, 'Yes' = 1)

# Academic Performance (0 = Worst, 1 = Best)
Connect$iv_academics <- recode(Connect$iv_academics, 
                             'Terrible' = 0,  
                             'Very Poor' = .2, 
                             'Poor' = .4,  
                             'Good' = .6,
                             'Very good' = .8,  #Note lower case here
                             'Excellent' = 1)

# Anger (1 = angry, 0 = not angry)
Connect$iv_anger <- as.character(Connect$iv_anger)
anger <- Connect %>% distinct(Connect$iv_anger)
print(anger)
Connect$iv_anger <- recode(Connect$iv_anger,
                         "Not visibly angry" = 0,
                         "Not Visibly Angry" = 0,
                         "Visibly Angry" = 1,
                         "Visibly angry" = 1)

# Intel on Country B (0 = Least Unreliable, 1 = Most Reliable)
intel <- Connect %>% distinct(iv_intel)
print(intel)

Connect$iv_intel <- recode(Connect$iv_intel, 
                         'Extremely unreliable' = 0, 
                         'Very unreliable' = .2, 
                         'Somewhat unreliable' = .4, 
                         'Somewhat reliable' = .6,
                         'Very reliable' = .8, 
                         'Extremely reliable' = 1) 

# Leader Fatigue (0 = Fatigue, 1 = No Fatigue)
Connect$iv_fatigue <- recode(Connect$iv_fatigue, 
                           'No fatigue' = 1,
                           'Visibly tired and fatigued' = 0,
                            'Jet lagged from international travel' = 0)

# Leader Illness (0 = Illness, 1 = No Illness)
class(Connect$iv_illness)
illness <- Connect %>% distinct(iv_illness)
Connect$iv_illness <- recode(Connect$iv_illness,
                           'No illness' = 1,
                            'Mild case of flu' = 0,
                            'Mild case of COVID-19' = 0)

# Leader stress(0 = Stress, 1 = No Stress)
class(Connect$iv_stress)
stress <- Connect %>% distinct(iv_stress)
print(stress)
Connect$iv_stress <- recode(Connect$iv_stress, 
                          "No other stress" = 1,
                          "Leaders child recently hospitalized" = 0, 
                         "Leaders child recently injured in accident" = 0,
                         "Leaders spouse recently hospitalized" = 0,
                         "Leaders spouse recently injured in accident" = 0)

# Military Strength (0 = Weakest, 1 = Strongest)
class(Connect$iv_milstrength)
mils <- Connect %>% distinct(iv_milstrength)
print(mils)
Connect$iv_milstrength <- recode(Connect$iv_milstrength, 
                               'Much weaker' = 0,
                               'Somewhat weaker' = .167,
                               'Slightly weaker' = .333,
                               'About the same' = .5,
                               'Slightly stronger' = .667,
                               'Somewhat stronger' = .833,
                               'Much stronger' = 1)


dat <- Connect

#Save Files

save(dat, file = "CredibilityJOP_CleanData.Rda")





